/*==============================================================================
IC - UK - Education from LFS

This do file brings in education data at the county level for the UK in 1979 
(first year detailed education data are available in the LFS)

The education variable of interest is highqualc-"The highest qualification held" 
The variable takes on the following values:
0 - First or higher degree				 
1 - Corp or grad member of professional institution
2 - HNC/SNC						
3 - Teaching qualification: Primary			
4 - Teaching qualification: Secondary 
5 - Nursing qualification				
6 - Recognised Trade Apprenticeship Completed		
7 - Recognised Trade Apprenticeship NOT Completed	
8 - ONC/OND					
9 - City and Guild
10 - A level						
11 - O level						
12 - CSE (other gradea)					
13 - Any other prof/vocational quelification		
14 - Still studying
15 - None
16 - Not known
17 - Not stated
18 - Not applicable

Note: The universe is persons born between 1919 and 1963 (people between 
the ages of 16-60 in 1979
 
==============================================================================*/

clear all
set more off

cd "$insheet_files/UK/LFS 1979/stata6"

use lfs79.dta

rename var132 highqualc

gen edatt_primary 		= 1 if highqualc==11|highqualc==12|highqualc==15
gen edatt_secondary 		= 1 if highqualc>=1 & highqualc<=10
gen  edatt_university 		= 1 if highqualc==0

egen edatt_total 		= rsum( edatt_primary edatt_secondary edatt_university)

forval n = 0/16 {
	gen edatt_yrs_`n' = .
}


replace edatt_yrs_8 = 1 if highqualc == 15
replace edatt_yrs_9 = 1 if highqualc == 12 // cse 
replace edatt_yrs_10 = 1 if highqualc == 11  //o_level 
	replace edatt_yrs_11 = 1 if highqualc == 7 // apprenticeship, no certificate
replace edatt_yrs_12 = 1 if highqualc == 10|highqualc == 8 // a_level or ONC
	replace edatt_yrs_12 = 1 if highqualc == 6|highqualc == 9|highqualc == 13 //apprenticeships, other vocational degrees 
replace edatt_yrs_14 = 1 if highqualc >=1 & highqualc <=5 //Higher National Certificate/ technical degrees- teaching/nursing
replace edatt_yrs_16 = 1 if highqualc == 0

/* GEOGRAPHY
urescompc - "Usual residence at metropolitan county and regional level"
1 - Wales
2 - Northern Ireland
3 - Tyne and Weer Metropolitan County
4 - Remainder of Northern region
5 - South Yorkshire Metropolitan County
6 - West Yorkshire Metropolitan County
7 - Remainder of Yorkshire and Humberside Region
8 - Greater Yorkshire Metropolitan County
9 - Merseyside Metroplitan County
10 - Remainder of North West region
11 - East Midlands region
12 - West Midlands Metropolitan County
13 - Remainder of West Midlands region
14 - East Anglia region
15 - Inner London
16 - Outer London
17 - Outer Metropolitan Area
18 - Outer South East
19 - South West Region
20 - Central Clydeside
21 - Remainder of Scotland
*/

rename var83 urescomp

keep urescomp edatt*

collapse (sum) edatt*, by(urescomp)

*II. Match to modern NUTS 2 
gen nuts=""

* this is a metropolitan region, but modern NUTS 2 also contains the large, 
* rural region of UKC21 
replace nuts = "UKC22&UKC23" if urescomp==3 		
* remainder of area outside of metropolitan region
replace nuts = "UKC11&UKC12&UKC13&UKC14&UKC21" if urescomp==4 	

* Don't want to classify a larger area as having the same education level as
* just the metro region. Collapse these two regions to take the average
* education across rural and metro areas and assign to NUTS 2
	replace nuts = "UKC1"  if nuts=="UKC22&UKC23" 
	replace nuts = "UKC1" if nuts=="UKC11&UKC12&UKC13&UKC14&UKC21"

replace nuts ="UKL1" if urescomp==1
	expand 2 if nuts=="UKL1"
	
replace nuts = "UKN0" if urescomp==2

replace nuts = "UKE3" if urescomp==5
replace nuts = "UKE4" if urescomp==6
replace nuts = "UKE1" if urescomp==7
replace nuts = "UKE2" if urescomp==8
replace nuts = "UKD7" if urescomp==9
replace nuts = "UKD1" if urescomp==10
	expand 4 if nuts=="UKD1"
replace nuts = "UKF1" if urescomp==11
	expand 3 if nuts=="UKF1" 
replace nuts = "UKG3" if urescomp==12
replace nuts = "UKG1" if urescomp==13
	expand 2 if nuts=="UKG1"
replace nuts = "UKH1" if urescomp==14
	expand 3 if nuts=="UKH1"
replace nuts = "UKI1" if urescomp==15
replace nuts = "UKI2" if urescomp==16
replace nuts = "UKJ1" if urescomp==17
	expand 2 if nuts=="UKJ1"
replace nuts = "UKJ3" if urescomp==18
	expand 2 if nuts=="UKJ3"
replace nuts = "UKK1" if urescomp==19
	expand 4 if nuts=="UKK1"
replace nuts = "UKM3" if urescomp==20
replace nuts = "UKM2" if urescomp==21
	expand 3 if nuts=="UKM2" 

bys nuts: gen n = _n
replace nuts ="UKL2" if nuts=="UKL1" & n==2
replace nuts ="UKD6" if nuts=="UKD1" & n==2
replace nuts ="UKD3" if nuts=="UKD1" & n==3
replace nuts ="UKD4" if nuts=="UKD1" & n==4
replace nuts ="UKF2" if nuts=="UKF1" & n==2
replace nuts ="UKF3" if nuts=="UKF1" & n==3
replace nuts ="UKG2" if nuts=="UKG1" & n==2
replace nuts ="UKH2" if nuts=="UKH1" & n==2
replace nuts ="UKH3" if nuts=="UKH1" & n==3
replace nuts ="UKJ2" if nuts=="UKJ1" & n==2
replace nuts ="UKJ4" if nuts=="UKJ3" & n==2
replace nuts ="UKK2" if nuts=="UKK1" & n==2
replace nuts ="UKK3" if nuts=="UKK1" & n==3
replace nuts ="UKK4" if nuts=="UKK1" & n==4
replace nuts ="UKM5" if nuts=="UKM2" & n==2
replace nuts ="UKM6" if nuts=="UKM2" & n==3

collapse (sum) edatt*, by(nuts)
expand 2 if nuts=="UKC1"
bys nuts: gen n = _n 
replace nuts ="UKC2" if nuts=="UKC1" & n==2
drop n

tempfile nuts2
save `nuts2.dta'

gen nuts1=substr(nuts,1,3)
collapse (sum) edatt*,  by(nuts1)

rename nuts1 nuts

tempfile nuts1
save `nuts1.dta'

collapse (sum) edatt* 
gen nuts="UK"

append using `nuts1.dta'
append using `nuts2.dta'

drop edatt_primary edatt_secondary edatt_university

save "$dta_files/IC_UK_education.dta", replace


